From 5d9878bd24850184fe67d36b1e6002fdefe57e49 Mon Sep 17 00:00:00 2001 From: "awilliam@xenbuild.aw" Date: Sun, 8 Oct 2006 18:55:12 -0600 Subject: [PATCH] [IA64] per vcpu vhpt Implement per vcpu vhpt option. allocate VHPT per vcpu. added compile time option, xen_ia64_pervcpu_vhpt=y, to enable it. Its default is on. added xen boot time option, pervcpu_vhpt=0, to disable it. This patch focuses on vcpu migration between physical cpus becaseu vcpu is heavily migrated with credit scheduler. This patch tries to reduce vTLB flush when vcpu is migrated Signed-off-by: Isaku Yamahata --- xen/arch/ia64/Rules.mk | 4 + xen/arch/ia64/vmx/vmx_entry.S | 2 +- xen/arch/ia64/xen/domain.c | 45 ++++++++-- xen/arch/ia64/xen/regionreg.c | 2 +- xen/arch/ia64/xen/vhpt.c | 149 +++++++++++++++++++++++++------- xen/include/asm-ia64/domain.h | 17 ++++ xen/include/asm-ia64/vhpt.h | 37 +++++++- xen/include/asm-ia64/xenkregs.h | 3 +- 8 files changed, 215 insertions(+), 44 deletions(-) diff --git a/xen/arch/ia64/Rules.mk b/xen/arch/ia64/Rules.mk index e11a791758..9d943ffe31 100644 --- a/xen/arch/ia64/Rules.mk +++ b/xen/arch/ia64/Rules.mk @@ -6,6 +6,7 @@ HAS_VGA := y VALIDATE_VT ?= n no_warns ?= n xen_ia64_expose_p2m ?= y +xen_ia64_pervcpu_vhpt ?= y ifneq ($(COMPILE_ARCH),$(TARGET_ARCH)) CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux- @@ -40,6 +41,9 @@ endif ifeq ($(xen_ia64_expose_p2m),y) CFLAGS += -DCONFIG_XEN_IA64_EXPOSE_P2M endif +ifeq ($(xen_ia64_pervcpu_vhpt),y) +CFLAGS += -DCONFIG_XEN_IA64_PERVCPU_VHPT +endif ifeq ($(no_warns),y) CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized endif diff --git a/xen/arch/ia64/vmx/vmx_entry.S b/xen/arch/ia64/vmx/vmx_entry.S index 53b00d9019..fa2a53670f 100644 --- a/xen/arch/ia64/vmx/vmx_entry.S +++ b/xen/arch/ia64/vmx/vmx_entry.S @@ -669,7 +669,7 @@ GLOBAL_ENTRY(vmx_switch_rr7) // re-pin mappings for guest_vhpt - mov r24=IA64_TR_PERVP_VHPT + mov r24=IA64_TR_VHPT movl r25=PAGE_KERNEL ;; or loc5 = r25,loc5 // construct PA | page properties diff --git a/xen/arch/ia64/xen/domain.c b/xen/arch/ia64/xen/domain.c index d489299768..4414e0ef45 100644 --- a/xen/arch/ia64/xen/domain.c +++ b/xen/arch/ia64/xen/domain.c @@ -69,6 +69,16 @@ DEFINE_PER_CPU(int *, current_psr_ic_addr); #include +static void +ia64_disable_vhpt_walker(void) +{ + // disable VHPT. ia64_new_rr7() might cause VHPT + // fault without this because it flushes dtr[IA64_TR_VHPT] + // (VHPT_SIZE_LOG2 << 2) is just for avoid + // Reserved Register/Field fault. + ia64_set_pta(VHPT_SIZE_LOG2 << 2); +} + static void flush_vtlb_for_context_switch(struct vcpu* vcpu) { int cpu = smp_processor_id(); @@ -92,8 +102,10 @@ static void flush_vtlb_for_context_switch(struct vcpu* vcpu) if (VMX_DOMAIN(vcpu)) { // currently vTLB for vt-i domian is per vcpu. // so any flushing isn't needed. + } else if (HAS_PERVCPU_VHPT(vcpu->domain)) { + // nothing to do } else { - vhpt_flush(); + local_vhpt_flush(); } local_flush_tlb_all(); perfc_incrc(flush_vtlb_for_context_switch); @@ -111,9 +123,9 @@ void schedule_tail(struct vcpu *prev) current->processor); } else { ia64_set_iva(&ia64_ivt); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); + ia64_disable_vhpt_walker(); load_region_regs(current); + ia64_set_pta(vcpu_pta(current)); vcpu_load_kernel_regs(current); __ia64_per_cpu_var(current_psr_i_addr) = ¤t->domain-> shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask; @@ -127,7 +139,6 @@ void schedule_tail(struct vcpu *prev) void context_switch(struct vcpu *prev, struct vcpu *next) { uint64_t spsr; - uint64_t pta; local_irq_save(spsr); @@ -164,9 +175,9 @@ void context_switch(struct vcpu *prev, struct vcpu *next) nd = current->domain; if (!is_idle_domain(nd)) { - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); + ia64_disable_vhpt_walker(); load_region_regs(current); + ia64_set_pta(vcpu_pta(current)); vcpu_load_kernel_regs(current); vcpu_set_next_timer(current); if (vcpu_timer_expired(current)) @@ -180,8 +191,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next) * walker. Then all accesses happen within idle context will * be handled by TR mapping and identity mapping. */ - pta = ia64_get_pta(); - ia64_set_pta(pta & ~VHPT_ENABLED); + ia64_disable_vhpt_walker(); __ia64_per_cpu_var(current_psr_i_addr) = NULL; __ia64_per_cpu_var(current_psr_ic_addr) = NULL; } @@ -270,6 +280,13 @@ struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id) if (!d->arch.is_vti) { int order; int i; + // vti domain has its own vhpt policy. + if (HAS_PERVCPU_VHPT(d)) { + if (pervcpu_vhpt_alloc(v) < 0) { + free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER); + return NULL; + } + } /* Create privregs page only if not VTi. */ order = get_order_from_shift(XMAPPEDREGS_SHIFT); @@ -312,6 +329,8 @@ struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id) void relinquish_vcpu_resources(struct vcpu *v) { + if (HAS_PERVCPU_VHPT(v->domain)) + pervcpu_vhpt_free(v); if (v->arch.privregs != NULL) { free_xenheap_pages(v->arch.privregs, get_order_from_shift(XMAPPEDREGS_SHIFT)); @@ -347,6 +366,11 @@ static void init_switch_stack(struct vcpu *v) memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96); } +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +static int opt_pervcpu_vhpt = 1; +integer_param("pervcpu_vhpt", opt_pervcpu_vhpt); +#endif + int arch_domain_create(struct domain *d) { int i; @@ -361,6 +385,11 @@ int arch_domain_create(struct domain *d) if (is_idle_domain(d)) return 0; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt; + DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n", + __func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt); +#endif d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT)); if (d->shared_info == NULL) goto fail_nomem; diff --git a/xen/arch/ia64/xen/regionreg.c b/xen/arch/ia64/xen/regionreg.c index 58c89201fb..612aced105 100644 --- a/xen/arch/ia64/xen/regionreg.c +++ b/xen/arch/ia64/xen/regionreg.c @@ -260,7 +260,7 @@ int set_one_rr(unsigned long rr, unsigned long val) } else if (rreg == 7) { ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info, v->arch.privregs, v->domain->arch.shared_info_va, - __get_cpu_var(vhpt_paddr)); + vcpu_vhpt_maddr(v)); } else { set_rr(rr,newrrv.rrval); } diff --git a/xen/arch/ia64/xen/vhpt.c b/xen/arch/ia64/xen/vhpt.c index b439ccda42..a8220da1e1 100644 --- a/xen/arch/ia64/xen/vhpt.c +++ b/xen/arch/ia64/xen/vhpt.c @@ -3,6 +3,10 @@ * * Copyright (C) 2004 Hewlett-Packard Co * Dan Magenheimer + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * per vcpu vhpt support */ #include #include @@ -24,18 +28,32 @@ extern long running_on_sim; DEFINE_PER_CPU (unsigned long, vhpt_paddr); DEFINE_PER_CPU (unsigned long, vhpt_pend); -void vhpt_flush(void) +static void + __vhpt_flush(unsigned long vhpt_maddr) { - struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr)); + struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr); int i; for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) v->ti_tag = INVALID_TI_TAG; } -static void vhpt_erase(void) +void +local_vhpt_flush(void) +{ + __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr)); +} + +static void +vcpu_vhpt_flush(struct vcpu* v) +{ + __vhpt_flush(vcpu_vhpt_maddr(v)); +} + +static void +vhpt_erase(unsigned long vhpt_maddr) { - struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR; + struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr); int i; for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) { @@ -47,17 +65,6 @@ static void vhpt_erase(void) // initialize cache too??? } - -static void vhpt_map(unsigned long pte) -{ - unsigned long psr; - - psr = ia64_clear_ic(); - ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, pte, VHPT_SIZE_LOG2); - ia64_set_psr(psr); - ia64_srlz_i(); -} - void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps) { struct vhpt_lf_entry *vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr); @@ -102,7 +109,7 @@ void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned long void vhpt_init(void) { - unsigned long paddr, pte; + unsigned long paddr; struct page_info *page; #if !VHPT_ENABLED return; @@ -122,14 +129,51 @@ void vhpt_init(void) __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1; printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n", paddr, __get_cpu_var(vhpt_pend)); - pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL)); - vhpt_map(pte); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); - vhpt_erase(); + vhpt_erase(paddr); + // we don't enable VHPT here. + // context_switch() or schedule_tail() does it. } +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +int +pervcpu_vhpt_alloc(struct vcpu *v) +{ + unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2; + + v->arch.vhpt_entries = + (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry); + v->arch.vhpt_page = + alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0); + if (!v->arch.vhpt_page) + return -ENOMEM; + + v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page); + if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1)) + panic("pervcpu_vhpt_init: bad VHPT alignment!\n"); + + v->arch.pta.val = 0; // to zero reserved bits + v->arch.pta.ve = 1; // enable vhpt + v->arch.pta.size = VHPT_SIZE_LOG2; + v->arch.pta.vf = 1; // long format + //v->arch.pta.base = __va(v->arch.vhpt_maddr) >> 15; + v->arch.pta.base = VHPT_ADDR >> 15; + + vhpt_erase(v->arch.vhpt_maddr); + smp_mb(); // per vcpu vhpt may be used by another physical cpu. + return 0; +} +void +pervcpu_vhpt_free(struct vcpu *v) +{ + free_domheap_pages(v->arch.vhpt_page, VHPT_SIZE_LOG2 - PAGE_SHIFT); +} +#endif + +// SMP: we can't assume v == current, vcpu might move to another physical cpu. +// So memory barrier is necessary. +// if we can guranttee that vcpu can run on only this physical cpu +// (e.g. vcpu == current), smp_mb() is unnecessary. void vcpu_flush_vtlb_all(struct vcpu *v) { if (VMX_DOMAIN(v)) { @@ -144,9 +188,14 @@ void vcpu_flush_vtlb_all(struct vcpu *v) /* First VCPU tlb. */ vcpu_purge_tr_entry(&PSCBX(v,dtlb)); vcpu_purge_tr_entry(&PSCBX(v,itlb)); + smp_mb(); /* Then VHPT. */ - vhpt_flush(); + if (HAS_PERVCPU_VHPT(v->domain)) + vcpu_vhpt_flush(v); + else + local_vhpt_flush(); + smp_mb(); /* Then mTLB. */ local_flush_tlb_all(); @@ -176,6 +225,13 @@ void domain_flush_vtlb_all (void) if (v->processor == cpu) vcpu_flush_vtlb_all(v); else + // SMP: it is racy to reference v->processor. + // vcpu scheduler may move this vcpu to another + // physicall processor, and change the value + // using plain store. + // We may be seeing the old value of it. + // In such case, flush_vtlb_for_context_switch() + // takes care of mTLB flush. smp_call_function_single(v->processor, __vcpu_flush_vtlb_all, v, 1, 1); @@ -183,24 +239,42 @@ void domain_flush_vtlb_all (void) perfc_incrc(domain_flush_vtlb_all); } -static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range) +// Callers may need to call smp_mb() before/after calling this. +// Be carefull. +static void +__flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range) { - void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu)); + void *vhpt_base = __va(vhpt_maddr); while ((long)addr_range > 0) { /* Get the VHPT entry. */ unsigned int off = ia64_thash(vadr) - VHPT_ADDR; - volatile struct vhpt_lf_entry *v; - v = vhpt_base + off; + struct vhpt_lf_entry *v = vhpt_base + off; v->ti_tag = INVALID_TI_TAG; addr_range -= PAGE_SIZE; vadr += PAGE_SIZE; } } +static void +cpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range) +{ + __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range); +} + +static void +vcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range) +{ + __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range); +} + void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range) { - cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range); + if (HAS_PERVCPU_VHPT(current->domain)) + vcpu_flush_vhpt_range(current, vadr, 1UL << log_range); + else + cpu_flush_vhpt_range(current->processor, + vadr, 1UL << log_range); ia64_ptcl(vadr, log_range << 2); ia64_srlz_i(); perfc_incrc(vcpu_flush_tlb_vhpt_range); @@ -233,8 +307,18 @@ void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range) if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) continue; - /* Invalidate VHPT entries. */ - cpu_flush_vhpt_range (v->processor, vadr, addr_range); + if (HAS_PERVCPU_VHPT(d)) { + vcpu_flush_vhpt_range(v, vadr, addr_range); + } else { + // SMP: it is racy to reference v->processor. + // vcpu scheduler may move this vcpu to another + // physicall processor, and change the value + // using plain store. + // We may be seeing the old value of it. + // In such case, flush_vtlb_for_context_switch() + /* Invalidate VHPT entries. */ + cpu_flush_vhpt_range(v->processor, vadr, addr_range); + } } // ptc.ga has release semantics. @@ -246,7 +330,7 @@ void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range) static void flush_tlb_vhpt_all (struct domain *d) { /* First VHPT. */ - vhpt_flush (); + local_vhpt_flush (); /* Then mTLB. */ local_flush_tlb_all (); @@ -255,7 +339,10 @@ static void flush_tlb_vhpt_all (struct domain *d) void domain_flush_tlb_vhpt(struct domain *d) { /* Very heavy... */ - on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); + if (HAS_PERVCPU_VHPT(d) /* || VMX_DOMAIN(v) */) + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + else + on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); cpus_clear (d->domain_dirty_cpumask); } diff --git a/xen/include/asm-ia64/domain.h b/xen/include/asm-ia64/domain.h index 99cfa7415e..34e24e5a85 100644 --- a/xen/include/asm-ia64/domain.h +++ b/xen/include/asm-ia64/domain.h @@ -87,6 +87,9 @@ struct arch_domain { unsigned long flags; struct { unsigned int is_vti : 1; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + unsigned int has_pervcpu_vhpt : 1; +#endif }; }; @@ -142,6 +145,13 @@ struct arch_domain { (sizeof(vcpu_info_t) * (v)->vcpu_id + \ offsetof(vcpu_info_t, evtchn_upcall_mask)) +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +#define HAS_PERVCPU_VHPT(d) ((d)->arch.has_pervcpu_vhpt) +#else +#define HAS_PERVCPU_VHPT(d) (0) +#endif + + struct arch_vcpu { /* Save the state of vcpu. This is the first entry to speed up accesses. */ @@ -193,6 +203,13 @@ struct arch_vcpu { struct timer hlt_timer; struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + PTA pta; + unsigned long vhpt_maddr; + struct page_info* vhpt_page; + unsigned long vhpt_entries; +#endif + #define INVALID_PROCESSOR INT_MAX int last_processor; }; diff --git a/xen/include/asm-ia64/vhpt.h b/xen/include/asm-ia64/vhpt.h index cb4fc30462..c59d8fd635 100644 --- a/xen/include/asm-ia64/vhpt.h +++ b/xen/include/asm-ia64/vhpt.h @@ -37,11 +37,46 @@ extern void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned long logps); extern void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps); -void vhpt_flush(void); +void local_vhpt_flush(void); /* Currently the VHPT is allocated per CPU. */ DECLARE_PER_CPU (unsigned long, vhpt_paddr); DECLARE_PER_CPU (unsigned long, vhpt_pend); +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +#if !VHPT_ENABLED +#error "VHPT_ENABLED must be set for CONFIG_XEN_IA64_PERVCPU_VHPT" +#endif +#endif + +#include +int pervcpu_vhpt_alloc(struct vcpu *v); +void pervcpu_vhpt_free(struct vcpu *v); +static inline unsigned long +vcpu_vhpt_maddr(struct vcpu* v) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) + return v->arch.vhpt_maddr; +#endif + +#if 0 + // referencecing v->processor is racy. + return per_cpu(vhpt_paddr, v->processor); +#endif + BUG_ON(v != current); + return __get_cpu_var(vhpt_paddr); +} + +static inline unsigned long +vcpu_pta(struct vcpu* v) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) + return v->arch.pta.val; +#endif + return VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | VHPT_ENABLED; +} + #endif /* !__ASSEMBLY */ #endif diff --git a/xen/include/asm-ia64/xenkregs.h b/xen/include/asm-ia64/xenkregs.h index dcfaf65d6b..d2dcd2bc84 100644 --- a/xen/include/asm-ia64/xenkregs.h +++ b/xen/include/asm-ia64/xenkregs.h @@ -7,8 +7,7 @@ #define IA64_TR_SHARED_INFO 3 /* dtr3: page shared with domain */ #define IA64_TR_VHPT 4 /* dtr4: vhpt */ #define IA64_TR_MAPPED_REGS 5 /* dtr5: vcpu mapped regs */ -#define IA64_TR_PERVP_VHPT 6 -#define IA64_DTR_GUEST_KERNEL 7 +#define IA64_DTR_GUEST_KERNEL 6 #define IA64_ITR_GUEST_KERNEL 2 /* Processor status register bits: */ #define IA64_PSR_VM_BIT 46 -- 2.30.2